bitkeeper revision 1.493 (3f84098eCbgC6OdGchmKMJlm84nGJw)
authorkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Wed, 8 Oct 2003 12:56:46 +0000 (12:56 +0000)
committerkaf24@scramble.cl.cam.ac.uk <kaf24@scramble.cl.cam.ac.uk>
Wed, 8 Oct 2003 12:56:46 +0000 (12:56 +0000)
memory.c, hypervisor.h, hypervisor-if.h, domain.c, entry.S:
  Faster page fault handling --- fast path in Xen if mapping to be updated is part of current address space.

xen/arch/i386/entry.S
xen/common/domain.c
xen/common/memory.c
xen/include/hypervisor-ifs/hypervisor-if.h
xenolinux-2.4.22-sparse/include/asm-xeno/hypervisor.h
xenolinux-2.4.22-sparse/mm/memory.c

index 3dcbff11c60fe9b3c5952cff21f917fdb691f2e4..2fe19dfb68a8c07151f4d555b740d80b2fb05f4c 100644 (file)
@@ -647,6 +647,7 @@ ENTRY(hypervisor_call_table)
         .long SYMBOL_NAME(do_dom_mem_op)
         .long SYMBOL_NAME(do_multicall)
         .long SYMBOL_NAME(do_kbd_op)
+        .long SYMBOL_NAME(do_update_va_mapping)
         .rept NR_syscalls-((.-hypervisor_call_table)/4)
         .long SYMBOL_NAME(sys_ni_syscall)
        .endr
index 0cd37ec261bb89edaedfb4e1dae36b474a7b0380..29e9cd02f5818ea747920b314ce31e6fafc89872 100644 (file)
@@ -163,6 +163,8 @@ void __kill_domain(struct task_struct *p)
 
 void kill_domain(void)
 {
+    /* May have been in middle of a p.t. update with WP bit cleared. */
+    write_cr0(read_cr0()|X86_CR0_WP);
     __kill_domain(current);
 }
 
index 78c06ce79cfc7581dfc423c5f6be91f9e659c631..af53536a07af2c96b054b047836e0f9c3e997b0a 100644 (file)
@@ -550,9 +550,7 @@ static int mod_l2_entry(l2_pgentry_t *p_l2_entry, l2_pgentry_t new_l2_entry)
                l2_pgentry_val(new_l2_entry)) & 0xfffff001) != 0 )
         {
             if ( (l2_pgentry_val(old_l2_entry) & _PAGE_PRESENT) ) 
-            {
                 put_l1_table(l2_pgentry_to_pagenr(old_l2_entry));
-            }
             
             /* Assume we're mapping an L1 table, falling back to twisted L2. */
             if ( unlikely(get_l1_table(l2_pgentry_to_pagenr(new_l2_entry))) )
@@ -601,15 +599,12 @@ static int mod_l1_entry(l1_pgentry_t *p_l1_entry, l1_pgentry_t new_l1_entry)
                l1_pgentry_val(new_l1_entry)) & 0xfffff003) != 0 )
         {
             if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) ) 
-            {
                 put_page(l1_pgentry_to_pagenr(old_l1_entry),
                          l1_pgentry_val(old_l1_entry) & _PAGE_RW);
-            }
-            
+
             if ( get_page(l1_pgentry_to_pagenr(new_l1_entry),
-                          l1_pgentry_val(new_l1_entry) & _PAGE_RW) ){
+                          l1_pgentry_val(new_l1_entry) & _PAGE_RW) )
                 goto fail;
-            }
         } 
     }
     else if ( (l1_pgentry_val(old_l1_entry) & _PAGE_PRESENT) )
@@ -753,17 +748,12 @@ int do_process_page_updates(page_update_request_t *ureqs, int count)
     struct pfn_info *page;
     int err = 0, i;
     unsigned int cmd;
-    unsigned long cr0 = read_cr0();
-
-    /* Clear the WP bit so that we can write even read-only page mappings. */
-    write_cr0(cr0 & ~X86_CR0_WP);
+    unsigned long cr0 = 0;
 
     for ( i = 0; i < count; i++ )
     {
         if ( copy_from_user(&req, ureqs, sizeof(req)) )
-        {
             kill_domain_with_errmsg("Cannot read page update request");
-        } 
 
         cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
         pfn = req.ptr >> PAGE_SHIFT;
@@ -773,26 +763,23 @@ int do_process_page_updates(page_update_request_t *ureqs, int count)
         spin_lock_irq(&current->page_lock);
 
         /* Get the page-frame number that a non-extended command references. */
-        if ( likely(cmd != PGREQ_EXTENDED_COMMAND) )
+        if ( (cmd == PGREQ_NORMAL_UPDATE) || (cmd == PGREQ_UNCHECKED_UPDATE) )
         {
-            if ( likely(cmd != PGREQ_MPT_UPDATE) )
+            if ( cr0 == 0 )
             {
-                /* Need to use 'get_user' since the VA's PGD may be absent. */
-                __get_user(l1e, (unsigned long *)(linear_pg_table+pfn));
-                /* Now check that the VA's PTE isn't absent. */
-                if ( !(l1e & _PAGE_PRESENT) )
-                {
-                    MEM_LOG("L1E n.p. at VA %08lx (%08lx)", req.ptr&~3, l1e);
-                    goto unlock;
-                }
-                /* Finally, get the underlying machine address. */
-                pfn = l1e >> PAGE_SHIFT;
+                cr0 = read_cr0();
+                write_cr0(cr0 & ~X86_CR0_WP);
             }
-            else if ( pfn >= max_page )
+            /* Need to use 'get_user' since the VA's PGD may be absent. */
+            __get_user(l1e, (unsigned long *)(linear_pg_table+pfn));
+            /* Now check that the VA's PTE isn't absent. */
+            if ( !(l1e & _PAGE_PRESENT) )
             {
-                MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page);
+                MEM_LOG("L1E n.p. at VA %08lx (%08lx)", req.ptr&~3, l1e);
                 goto unlock;
             }
+            /* Finally, get the underlying machine address. */
+            pfn = l1e >> PAGE_SHIFT;
         }
 
         /* Least significant bits of 'ptr' demux the operation type. */
@@ -850,7 +837,11 @@ int do_process_page_updates(page_update_request_t *ureqs, int count)
             
         case PGREQ_MPT_UPDATE:
             page = frame_table + pfn;
-            if ( DOMAIN_OKAY(page->flags) )
+            if ( pfn >= max_page )
+            {
+                MEM_LOG("Page out of range (%08lx > %08lx)", pfn, max_page);
+            }
+            else if ( DOMAIN_OKAY(page->flags) )
             {
                 machine_to_phys_mapping[pfn] = req.val;
                 err = 0;
@@ -892,9 +883,77 @@ int do_process_page_updates(page_update_request_t *ureqs, int count)
 
     }
 
-    /* Restore the WP bit before returning to guest. */
-    write_cr0(cr0);
+    if ( cr0 != 0 )
+        write_cr0(cr0);
 
     return 0;
 }
 
+
+/*
+ * Note: This function is structured this way so that the common path is very 
+ * fast. Tests that are unlikely to be TRUE branch to out-of-line code. 
+ * Unfortunately GCC's 'unlikely()' macro doesn't do the right thing :-(
+ */
+int do_update_va_mapping(unsigned long page_nr, 
+                         unsigned long val, 
+                         unsigned long flags)
+{
+    unsigned long _x, cr0 = 0;
+    struct task_struct *p = current;
+    int err = -EINVAL;
+
+    if ( page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT) )
+        goto out;
+
+    spin_lock_irq(&p->page_lock);
+
+    /* Check that the VA's page-directory entry is present.. */
+    if ( (err = __get_user(_x, (unsigned long *)
+                           (&linear_pg_table[page_nr]))) != 0 )
+        goto unlock_and_out;
+
+    /* If the VA's page-directory entry is read-only, we frob the WP bit. */
+    if ( __put_user(_x, (unsigned long *)(&linear_pg_table[page_nr])) )
+        goto clear_wp; return_from_clear_wp:
+
+    if ( (err = mod_l1_entry(&linear_pg_table[page_nr], 
+                             mk_l1_pgentry(val))) != 0 )
+        goto bad;
+
+    if ( (flags & UVMF_INVLPG) )
+        goto invlpg; return_from_invlpg:
+
+    if ( (flags & UVMF_FLUSH_TLB) )
+        goto flush; return_from_flush:
+
+    if ( cr0 != 0 )
+        goto write_cr0; return_from_write_cr0:
+
+ unlock_and_out:
+    spin_unlock_irq(&p->page_lock);
+ out:
+    return err;
+
+ clear_wp:
+    cr0 = read_cr0();
+    write_cr0(cr0 & ~X86_CR0_WP);        
+    goto return_from_clear_wp;
+
+ bad:
+    spin_unlock_irq(&p->page_lock);
+    kill_domain_with_errmsg("Illegal VA-mapping update request");
+    return 0;
+
+ invlpg:
+    flush_tlb[p->processor] = 1;
+    goto return_from_invlpg;
+    
+ flush:
+    __write_cr3_counted(pagetable_val(p->mm.pagetable));
+    goto return_from_flush;
+
+ write_cr0:
+    write_cr0(cr0);
+    goto return_from_write_cr0;
+}
index e49e3a92658f45c184e66eb6a6663f80821af339..4405191a6dd64186f13966f30cd9afc9ceec1ea2 100644 (file)
@@ -60,6 +60,7 @@
 #define __HYPERVISOR_dom_mem_op                  17
 #define __HYPERVISOR_multicall           18
 #define __HYPERVISOR_kbd_op               19
+#define __HYPERVISOR_update_va_mapping    20
 
 /* And the trap vector is... */
 #define TRAP_INSTR "int $0x82"
 #define PGEXT_CMD_MASK        255
 #define PGEXT_CMD_SHIFT         8
 
+/* These are passed as 'flags' to update_va_mapping. They can be ORed. */
+#define UVMF_FLUSH_TLB          1 /* Flush entire TLB. */
+#define UVMF_INVLPG             2 /* Flush the VA mapping being updated. */
+
 /*
  * Master "switch" for enabling/disabling event delivery.
  */
index 763ff81bfe78849d96b025f4dd4eaf8b458d76bb..b0708dc6c343b5018d33ffe0b5fd2b7d2e180c29 100644 (file)
@@ -369,4 +369,16 @@ static inline long HYPERVISOR_kbd_op(unsigned char op, unsigned char val)
     return ret;
 }
 
+static inline int HYPERVISOR_update_va_mapping(
+    unsigned long page_nr, pte_t new_val, unsigned long flags)
+{
+    int ret;
+    __asm__ __volatile__ (
+        TRAP_INSTR
+        : "=a" (ret) : "0" (__HYPERVISOR_update_va_mapping), 
+        "b" (page_nr), "c" ((new_val).pte_low), "d" (flags) );
+
+    return ret;
+}
+
 #endif /* __HYPERVISOR_H__ */
index bdefce07d1c260c15a148cc55f6057e3fafe3a66..37fb2afd84111a9e47df91095b894bf32feb1bec 100644 (file)
@@ -918,8 +918,18 @@ int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long
  */
 static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry)
 {
+#ifdef CONFIG_XENO
+       if ( likely(vma->vm_mm == current->mm) ) {
+               XENO_flush_page_update_queue();
+               HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, entry, UVMF_INVLPG);
+       } else {
+               set_pte(page_table, entry);
+               flush_tlb_page(vma, address);
+       }
+#else
        set_pte(page_table, entry);
        flush_tlb_page(vma, address);
+#endif
        update_mmu_cache(vma, address, entry);
 }
 
@@ -1183,11 +1193,20 @@ static int do_swap_page(struct mm_struct * mm,
 
        flush_page_to_ram(page);
        flush_icache_page(vma, page);
+#ifdef CONFIG_XENO
+       if ( likely(vma->vm_mm == current->mm) ) {
+               XENO_flush_page_update_queue();
+               HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, pte, 0);
+       } else {
+               set_pte(page_table, pte);
+               XENO_flush_page_update_queue();
+       }
+#else
        set_pte(page_table, pte);
+#endif
 
        /* No need to invalidate - it was non-present before */
        update_mmu_cache(vma, address, pte);
-       XENO_flush_page_update_queue();
        spin_unlock(&mm->page_table_lock);
        return ret;
 }
@@ -1229,11 +1248,20 @@ static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma,
                mark_page_accessed(page);
        }
 
+#ifdef CONFIG_XENO
+       if ( likely(vma->vm_mm == current->mm) ) {
+               XENO_flush_page_update_queue();
+               HYPERVISOR_update_va_mapping(addr>>PAGE_SHIFT, entry, 0);
+       } else {
+               set_pte(page_table, entry);
+               XENO_flush_page_update_queue();
+       }
+#else
        set_pte(page_table, entry);
+#endif
 
        /* No need to invalidate - it was non-present before */
        update_mmu_cache(vma, addr, entry);
-       XENO_flush_page_update_queue();
        spin_unlock(&mm->page_table_lock);
        return 1;       /* Minor fault */
 
@@ -1304,7 +1332,17 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
                entry = mk_pte(new_page, vma->vm_page_prot);
                if (write_access)
                        entry = pte_mkwrite(pte_mkdirty(entry));
+#ifdef CONFIG_XENO
+               if ( likely(vma->vm_mm == current->mm) ) {
+                       XENO_flush_page_update_queue();
+                       HYPERVISOR_update_va_mapping(address>>PAGE_SHIFT, entry, 0);
+               } else {
+                       set_pte(page_table, entry);
+                       XENO_flush_page_update_queue();
+               }
+#else
                set_pte(page_table, entry);
+#endif
        } else {
                /* One of our sibling threads was faster, back out. */
                page_cache_release(new_page);
@@ -1314,7 +1352,6 @@ static int do_no_page(struct mm_struct * mm, struct vm_area_struct * vma,
 
        /* no need to invalidate: a not-present page shouldn't be cached */
        update_mmu_cache(vma, address, entry);
-       XENO_flush_page_update_queue();
        spin_unlock(&mm->page_table_lock);
        return 2;       /* Major fault */
 }
@@ -1366,7 +1403,6 @@ static inline int handle_pte_fault(struct mm_struct *mm,
        }
        entry = pte_mkyoung(entry);
        establish_pte(vma, address, pte, entry);
-       XENO_flush_page_update_queue();
        spin_unlock(&mm->page_table_lock);
        return 1;
 }